We shall perform data import and data inspection for performing analysis
import pandas as pd
sdata = pd.read_csv("summer.csv")
wdata = pd.read_csv("winter.csv")
dataDic = pd.read_csv("dictionary.csv")
sdata
| Year | City | Sport | Discipline | Athlete | Country | Gender | Event | Medal | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold |
| 1 | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver |
| 2 | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze |
| 3 | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold |
| 4 | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 31160 | 2012 | London | Wrestling | Wrestling Freestyle | JANIKOWSKI, Damian | POL | Men | Wg 84 KG | Bronze |
| 31161 | 2012 | London | Wrestling | Wrestling Freestyle | REZAEI, Ghasem Gholamreza | IRI | Men | Wg 96 KG | Gold |
| 31162 | 2012 | London | Wrestling | Wrestling Freestyle | TOTROV, Rustam | RUS | Men | Wg 96 KG | Silver |
| 31163 | 2012 | London | Wrestling | Wrestling Freestyle | ALEKSANYAN, Artur | ARM | Men | Wg 96 KG | Bronze |
| 31164 | 2012 | London | Wrestling | Wrestling Freestyle | LIDBERG, Jimmy | SWE | Men | Wg 96 KG | Bronze |
31165 rows × 9 columns
sdata.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 31165 entries, 0 to 31164 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Year 31165 non-null int64 1 City 31165 non-null object 2 Sport 31165 non-null object 3 Discipline 31165 non-null object 4 Athlete 31165 non-null object 5 Country 31161 non-null object 6 Gender 31165 non-null object 7 Event 31165 non-null object 8 Medal 31165 non-null object dtypes: int64(1), object(8) memory usage: 2.1+ MB
wdata
| Year | City | Sport | Discipline | Athlete | Country | Gender | Event | Medal | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1924 | Chamonix | Biathlon | Biathlon | BERTHET, G. | FRA | Men | Military Patrol | Bronze |
| 1 | 1924 | Chamonix | Biathlon | Biathlon | MANDRILLON, C. | FRA | Men | Military Patrol | Bronze |
| 2 | 1924 | Chamonix | Biathlon | Biathlon | MANDRILLON, Maurice | FRA | Men | Military Patrol | Bronze |
| 3 | 1924 | Chamonix | Biathlon | Biathlon | VANDELLE, André | FRA | Men | Military Patrol | Bronze |
| 4 | 1924 | Chamonix | Biathlon | Biathlon | AUFDENBLATTEN, Adolf | SUI | Men | Military Patrol | Gold |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5765 | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze |
| 5766 | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold |
| 5767 | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver |
| 5768 | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold |
| 5769 | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze |
5770 rows × 9 columns
wdata.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5770 entries, 0 to 5769 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Year 5770 non-null int64 1 City 5770 non-null object 2 Sport 5770 non-null object 3 Discipline 5770 non-null object 4 Athlete 5770 non-null object 5 Country 5770 non-null object 6 Gender 5770 non-null object 7 Event 5770 non-null object 8 Medal 5770 non-null object dtypes: int64(1), object(8) memory usage: 405.8+ KB
dataDic
| Country | Code | Population | GDP per Capita | |
|---|---|---|---|---|
| 0 | Afghanistan | AFG | 32526562.0 | 594.323081 |
| 1 | Albania | ALB | 2889167.0 | 3945.217582 |
| 2 | Algeria | ALG | 39666519.0 | 4206.031232 |
| 3 | American Samoa* | ASA | 55538.0 | NaN |
| 4 | Andorra | AND | 70473.0 | NaN |
| ... | ... | ... | ... | ... |
| 196 | Vietnam | VIE | 91703800.0 | 2111.138024 |
| 197 | Virgin Islands* | ISV | 103574.0 | NaN |
| 198 | Yemen | YEM | 26832215.0 | 1406.291651 |
| 199 | Zambia | ZAM | 16211767.0 | 1304.879014 |
| 200 | Zimbabwe | ZIM | 15602751.0 | 924.143819 |
201 rows × 4 columns
dataDic.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 201 entries, 0 to 200 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country 201 non-null object 1 Code 201 non-null object 2 Population 196 non-null float64 3 GDP per Capita 176 non-null float64 dtypes: float64(2), object(2) memory usage: 6.4+ KB
dataDic[dataDic.isnull().any(axis = 1)]
| Country | Code | Population | GDP per Capita | |
|---|---|---|---|---|
| 3 | American Samoa* | ASA | 55538.0 | NaN |
| 4 | Andorra | AND | 70473.0 | NaN |
| 9 | Aruba* | ARU | 103889.0 | NaN |
| 20 | Bermuda* | BER | 65235.0 | NaN |
| 27 | British Virgin Islands | IVB | 30117.0 | NaN |
| 36 | Cayman Islands* | CAY | 59967.0 | NaN |
| 45 | Cook Islands | COK | NaN | NaN |
| 49 | Cuba | CUB | 11389562.0 | NaN |
| 61 | Eritrea | ERI | NaN | NaN |
| 74 | Guam | GUM | 169885.0 | NaN |
| 86 | Iran | IRI | 79109272.0 | NaN |
| 96 | Korea, North | PRK | 25155317.0 | NaN |
| 105 | Libya | LBA | 6278438.0 | NaN |
| 106 | Liechtenstein | LIE | 37531.0 | NaN |
| 116 | Mauritania | MTN | 4067564.0 | NaN |
| 121 | Monaco | MON | 37731.0 | NaN |
| 130 | Netherlands Antilles* | AHO | NaN | NaN |
| 139 | Palestine, Occupied Territories | PLE | NaN | NaN |
| 141 | Papua New Guinea | PNG | 7619321.0 | NaN |
| 147 | Puerto Rico* | PUR | 3474182.0 | NaN |
| 156 | San Marino | SMR | 31781.0 | NaN |
| 176 | Syria | SYR | 18502413.0 | NaN |
| 177 | Taiwan | TPE | NaN | NaN |
| 195 | Venezuela | VEN | 31108083.0 | NaN |
| 197 | Virgin Islands* | ISV | 103574.0 | NaN |
pd.concat([sdata, wdata], axis = 0, keys = ["Summer", "Winter"],
names = ["Edition"]).reset_index()
| Edition | level_1 | Year | City | Sport | Discipline | Athlete | Country | Gender | Event | Medal | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 0 | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold |
| 1 | Summer | 1 | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver |
| 2 | Summer | 2 | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze |
| 3 | Summer | 3 | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold |
| 4 | Summer | 4 | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36930 | Winter | 5765 | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze |
| 36931 | Winter | 5766 | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold |
| 36932 | Winter | 5767 | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver |
| 36933 | Winter | 5768 | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold |
| 36934 | Winter | 5769 | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze |
36935 rows × 11 columns
pd.concat([sdata, wdata], axis = 0, keys = ["Summer", "Winter"],
names = ["Edition"]).reset_index().drop(columns = "level_1")
| Edition | Year | City | Sport | Discipline | Athlete | Country | Gender | Event | Medal | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold |
| 1 | Summer | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver |
| 2 | Summer | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze |
| 3 | Summer | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold |
| 4 | Summer | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze |
| 36931 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold |
| 36932 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver |
| 36933 | Winter | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold |
| 36934 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze |
36935 rows × 10 columns
olympics = pd.concat([sdata, wdata], axis = 0, keys = ["Summer", "Winter"],
names = ["Edition"]).reset_index().drop(columns = "level_1")
olympics
| Edition | Year | City | Sport | Discipline | Athlete | Country | Gender | Event | Medal | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold |
| 1 | Summer | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver |
| 2 | Summer | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze |
| 3 | Summer | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold |
| 4 | Summer | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze |
| 36931 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold |
| 36932 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver |
| 36933 | Winter | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold |
| 36934 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze |
36935 rows × 10 columns
dataDic.head()
| Country | Code | Population | GDP per Capita | |
|---|---|---|---|---|
| 0 | Afghanistan | AFG | 32526562.0 | 594.323081 |
| 1 | Albania | ALB | 2889167.0 | 3945.217582 |
| 2 | Algeria | ALG | 39666519.0 | 4206.031232 |
| 3 | American Samoa* | ASA | 55538.0 | NaN |
| 4 | Andorra | AND | 70473.0 | NaN |
olympics.merge(dataDic.iloc[:, :2], how = "left",
left_on = "Country", right_on = "Code").drop(columns = ["Code"])
| Edition | Year | City | Sport | Discipline | Athlete | Country_x | Gender | Event | Medal | Country_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold | Hungary |
| 1 | Summer | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver | Austria |
| 2 | Summer | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze | Greece |
| 3 | Summer | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold | Greece |
| 4 | Summer | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver | Greece |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze | United Kingdom |
| 36931 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold | United States |
| 36932 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver | Canada |
| 36933 | Winter | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold | Czech Republic |
| 36934 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze | France |
36935 rows × 11 columns
olympics = olympics.merge(dataDic.iloc[:, :2], how = "left",
left_on = "Country", right_on = "Code").drop(columns = ["Code"])
olympics.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 36935 entries, 0 to 36934 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Edition 36935 non-null object 1 Year 36935 non-null int64 2 City 36935 non-null object 3 Sport 36935 non-null object 4 Discipline 36935 non-null object 5 Athlete 36935 non-null object 6 Country_x 36931 non-null object 7 Gender 36935 non-null object 8 Event 36935 non-null object 9 Medal 36935 non-null object 10 Country_y 30568 non-null object dtypes: int64(1), object(10) memory usage: 3.4+ MB
olympics.reset_index(drop = True, inplace = True)
olympics
| Edition | Year | City | Sport | Discipline | Athlete | Country_x | Gender | Event | Medal | Country_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold | Hungary |
| 1 | Summer | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver | Austria |
| 2 | Summer | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze | Greece |
| 3 | Summer | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold | Greece |
| 4 | Summer | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver | Greece |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze | United Kingdom |
| 36931 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold | United States |
| 36932 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver | Canada |
| 36933 | Winter | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold | Czech Republic |
| 36934 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze | France |
36935 rows × 11 columns
olympics
| Edition | Year | City | Sport | Discipline | Athlete | Country_x | Gender | Event | Medal | Country_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold | Hungary |
| 1 | Summer | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver | Austria |
| 2 | Summer | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze | Greece |
| 3 | Summer | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold | Greece |
| 4 | Summer | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver | Greece |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze | United Kingdom |
| 36931 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold | United States |
| 36932 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver | Canada |
| 36933 | Winter | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold | Czech Republic |
| 36934 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze | France |
36935 rows × 11 columns
olympics.rename(columns = {"Country_x":"Code", "Country_y": "Country"}, inplace = True)
dataDic
| Country | Code | Population | GDP per Capita | |
|---|---|---|---|---|
| 0 | Afghanistan | AFG | 32526562.0 | 594.323081 |
| 1 | Albania | ALB | 2889167.0 | 3945.217582 |
| 2 | Algeria | ALG | 39666519.0 | 4206.031232 |
| 3 | American Samoa* | ASA | 55538.0 | NaN |
| 4 | Andorra | AND | 70473.0 | NaN |
| ... | ... | ... | ... | ... |
| 196 | Vietnam | VIE | 91703800.0 | 2111.138024 |
| 197 | Virgin Islands* | ISV | 103574.0 | NaN |
| 198 | Yemen | YEM | 26832215.0 | 1406.291651 |
| 199 | Zambia | ZAM | 16211767.0 | 1304.879014 |
| 200 | Zimbabwe | ZIM | 15602751.0 | 924.143819 |
201 rows × 4 columns
dataDic.rename(columns = {"GDP per Capita":"GDP"}, inplace= True)
olympics.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 36935 entries, 0 to 36934 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Edition 36935 non-null object 1 Year 36935 non-null int64 2 City 36935 non-null object 3 Sport 36935 non-null object 4 Discipline 36935 non-null object 5 Athlete 36935 non-null object 6 Code 36931 non-null object 7 Gender 36935 non-null object 8 Event 36935 non-null object 9 Medal 36935 non-null object 10 Country 30568 non-null object dtypes: int64(1), object(10) memory usage: 3.1+ MB
olympics.loc[olympics.Country.isnull()]
| Edition | Year | City | Sport | Discipline | Athlete | Code | Gender | Event | Medal | Country | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 132 | Summer | 1896 | Athens | Tennis | Tennis | FLACK, Edwin | ZZX | Men | Doubles | Bronze | NaN |
| 133 | Summer | 1896 | Athens | Tennis | Tennis | ROBERTSON, George Stuart | ZZX | Men | Doubles | Bronze | NaN |
| 134 | Summer | 1896 | Athens | Tennis | Tennis | BOLAND, John | ZZX | Men | Doubles | Gold | NaN |
| 135 | Summer | 1896 | Athens | Tennis | Tennis | TRAUN, Friedrich | ZZX | Men | Doubles | Gold | NaN |
| 136 | Summer | 1896 | Athens | Tennis | Tennis | KASDAGLIS, Dionysios | ZZX | Men | Doubles | Silver | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 33961 | Winter | 1992 | Albertville | Skiing | Freestyle Skiing | KOZHEVNIKOVA, Yelizaveta | EUN | Women | Moguls | Silver | NaN |
| 33977 | Winter | 1992 | Albertville | Skiing | Ski Jumping | GODER, Tomas | TCH | Men | K120 Team (90M) | Bronze | NaN |
| 33978 | Winter | 1992 | Albertville | Skiing | Ski Jumping | JEZ, Frantisek | TCH | Men | K120 Team (90M) | Bronze | NaN |
| 33979 | Winter | 1992 | Albertville | Skiing | Ski Jumping | PARMA, Jiri | TCH | Men | K120 Team (90M) | Bronze | NaN |
| 33980 | Winter | 1992 | Albertville | Skiing | Ski Jumping | SAKALA, Jaroslav | TCH | Men | K120 Team (90M) | Bronze | NaN |
6367 rows × 11 columns
missing_C_index = olympics.loc[olympics.Country.isnull()].index
missing_C_index
Int64Index([ 132, 133, 134, 135, 136, 137, 257, 258, 259,
260,
...
33939, 33947, 33949, 33953, 33954, 33961, 33977, 33978, 33979,
33980],
dtype='int64', length=6367)
olympics.loc[olympics.Country.isnull()].Code.value_counts()
URS 2489 GDR 987 ROU 642 FRG 584 TCH 487 YUG 442 EUN 283 EUA 281 ZZX 48 SRB 31 ANZ 29 RU1 17 MNE 14 TTO 10 BOH 7 BWI 5 SGP 4 IOP 3 Name: Code, dtype: int64
old_c = olympics.loc[olympics.Country.isnull(), :].Code.value_counts().index
old_c
Index(['URS', 'GDR', 'ROU', 'FRG', 'TCH', 'YUG', 'EUN', 'EUA', 'ZZX', 'SRB',
'ANZ', 'RU1', 'MNE', 'TTO', 'BOH', 'BWI', 'SGP', 'IOP'],
dtype='object')
mapper = pd.Series(index=old_c, name = "Country", data = ["Soviet Union", "East Germany", "Romania", "West Germany", "Czechoslovakia",
"Yugoslavia", "Unified Team", "Unified Team of Germany", "Mixed teams", "Serbia",
"Australasia", "Russian Empire", "Montenegro", "Trinidad and Tobago", "Bohemia",
"West Indies Federation", "Singapore", "Independent Olympic Participants"])
mapper
URS Soviet Union GDR East Germany ROU Romania FRG West Germany TCH Czechoslovakia YUG Yugoslavia EUN Unified Team EUA Unified Team of Germany ZZX Mixed teams SRB Serbia ANZ Australasia RU1 Russian Empire MNE Montenegro TTO Trinidad and Tobago BOH Bohemia BWI West Indies Federation SGP Singapore IOP Independent Olympic Participants Name: Country, dtype: object
olympics.loc[missing_C_index, "Code"].map(mapper)
132 Mixed teams
133 Mixed teams
134 Mixed teams
135 Mixed teams
136 Mixed teams
...
33961 Unified Team
33977 Czechoslovakia
33978 Czechoslovakia
33979 Czechoslovakia
33980 Czechoslovakia
Name: Code, Length: 6367, dtype: object
olympics.Country.fillna(olympics.Code.map(mapper), inplace = True)
olympics.loc[missing_C_index]
| Edition | Year | City | Sport | Discipline | Athlete | Code | Gender | Event | Medal | Country | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 132 | Summer | 1896 | Athens | Tennis | Tennis | FLACK, Edwin | ZZX | Men | Doubles | Bronze | Mixed teams |
| 133 | Summer | 1896 | Athens | Tennis | Tennis | ROBERTSON, George Stuart | ZZX | Men | Doubles | Bronze | Mixed teams |
| 134 | Summer | 1896 | Athens | Tennis | Tennis | BOLAND, John | ZZX | Men | Doubles | Gold | Mixed teams |
| 135 | Summer | 1896 | Athens | Tennis | Tennis | TRAUN, Friedrich | ZZX | Men | Doubles | Gold | Mixed teams |
| 136 | Summer | 1896 | Athens | Tennis | Tennis | KASDAGLIS, Dionysios | ZZX | Men | Doubles | Silver | Mixed teams |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 33961 | Winter | 1992 | Albertville | Skiing | Freestyle Skiing | KOZHEVNIKOVA, Yelizaveta | EUN | Women | Moguls | Silver | Unified Team |
| 33977 | Winter | 1992 | Albertville | Skiing | Ski Jumping | GODER, Tomas | TCH | Men | K120 Team (90M) | Bronze | Czechoslovakia |
| 33978 | Winter | 1992 | Albertville | Skiing | Ski Jumping | JEZ, Frantisek | TCH | Men | K120 Team (90M) | Bronze | Czechoslovakia |
| 33979 | Winter | 1992 | Albertville | Skiing | Ski Jumping | PARMA, Jiri | TCH | Men | K120 Team (90M) | Bronze | Czechoslovakia |
| 33980 | Winter | 1992 | Albertville | Skiing | Ski Jumping | SAKALA, Jaroslav | TCH | Men | K120 Team (90M) | Bronze | Czechoslovakia |
6367 rows × 11 columns
olympics.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 36935 entries, 0 to 36934 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Edition 36935 non-null object 1 Year 36935 non-null int64 2 City 36935 non-null object 3 Sport 36935 non-null object 4 Discipline 36935 non-null object 5 Athlete 36935 non-null object 6 Code 36931 non-null object 7 Gender 36935 non-null object 8 Event 36935 non-null object 9 Medal 36935 non-null object 10 Country 36931 non-null object dtypes: int64(1), object(10) memory usage: 3.1+ MB
olympics[olympics.Code.isna()]
| Edition | Year | City | Sport | Discipline | Athlete | Code | Gender | Event | Medal | Country | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 29603 | Summer | 2012 | London | Athletics | Athletics | Pending | NaN | Women | 1500M | Gold | NaN |
| 31072 | Summer | 2012 | London | Weightlifting | Weightlifting | Pending | NaN | Women | 63KG | Gold | NaN |
| 31091 | Summer | 2012 | London | Weightlifting | Weightlifting | Pending | NaN | Men | 94KG | Silver | NaN |
| 31110 | Summer | 2012 | London | Wrestling | Wrestling Freestyle | KUDUKHOV, Besik | NaN | Men | Wf 60 KG | Silver | NaN |
olympics.dropna(subset = ["Code"], inplace = True)
olympics.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 36931 entries, 0 to 36934 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Edition 36931 non-null object 1 Year 36931 non-null int64 2 City 36931 non-null object 3 Sport 36931 non-null object 4 Discipline 36931 non-null object 5 Athlete 36931 non-null object 6 Code 36931 non-null object 7 Gender 36931 non-null object 8 Event 36931 non-null object 9 Medal 36931 non-null object 10 Country 36931 non-null object dtypes: int64(1), object(10) memory usage: 3.4+ MB
olympics.reset_index(drop = True, inplace = True)
olympics.nunique()
Edition 2 Year 33 City 41 Sport 48 Discipline 80 Athlete 26494 Code 148 Gender 2 Event 733 Medal 3 Country 145 dtype: int64
olympics.Medal = olympics.Medal.astype("category")
olympics.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 36931 entries, 0 to 36930 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Edition 36931 non-null object 1 Year 36931 non-null int64 2 City 36931 non-null object 3 Sport 36931 non-null object 4 Discipline 36931 non-null object 5 Athlete 36931 non-null object 6 Code 36931 non-null object 7 Gender 36931 non-null object 8 Event 36931 non-null object 9 Medal 36931 non-null category 10 Country 36931 non-null object dtypes: category(1), int64(1), object(9) memory usage: 2.9+ MB
olympics.Medal.sort_values()
36930 Bronze
25565 Bronze
25562 Bronze
25559 Bronze
11084 Bronze
...
10811 Silver
10810 Silver
10809 Silver
25834 Silver
18465 Silver
Name: Medal, Length: 36931, dtype: category
Categories (3, object): ['Bronze', 'Gold', 'Silver']
olympics.Medal.cat.set_categories(["Bronze", "Silver", "Gold"], ordered = True, inplace = True)
C:\Users\Jayanth\AppData\Local\Temp\ipykernel_18964\1904861286.py:1: FutureWarning: The `inplace` parameter in pandas.Categorical.set_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object. olympics.Medal.cat.set_categories(["Bronze", "Silver", "Gold"], ordered = True, inplace = True)
olympics.Medal.sort_values()
36930 Bronze
8916 Bronze
15339 Bronze
27698 Bronze
8919 Bronze
...
25833 Gold
10807 Gold
10806 Gold
10804 Gold
0 Gold
Name: Medal, Length: 36931, dtype: category
Categories (3, object): ['Bronze' < 'Silver' < 'Gold']
For the next questions, use Seaborn plots.
import matplotlib.pyplot as plt
import seaborn as sns
olympics
| Edition | Year | City | Sport | Discipline | Athlete | Code | Gender | Event | Medal | Country | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold | Hungary |
| 1 | Summer | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver | Austria |
| 2 | Summer | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze | Greece |
| 3 | Summer | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold | Greece |
| 4 | Summer | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver | Greece |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36926 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze | United Kingdom |
| 36927 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold | United States |
| 36928 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver | Canada |
| 36929 | Winter | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold | Czech Republic |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze | France |
36931 rows × 11 columns
olympics.Country.value_counts()
United States 5238
Soviet Union 2489
United Kingdom 1799
Germany 1665
France 1548
...
Virgin Islands* 1
Guyana 1
Netherlands Antilles* 1
Iraq 1
Bermuda* 1
Name: Country, Length: 145, dtype: int64
top_10 = olympics.Country.value_counts().head(10)
top_10
United States 5238 Soviet Union 2489 United Kingdom 1799 Germany 1665 France 1548 Italy 1488 Sweden 1477 Canada 1274 Australia 1204 Hungary 1091 Name: Country, dtype: int64
top_10.plot(kind = "bar", fontsize = 15, figsize=(12,8))
plt.title("Top 10 Countries by Medals", fontsize = 15)
plt.ylabel("Medals", fontsize = 14)
plt.show()
olympics_10 = olympics[olympics.Country.isin(top_10.index)]
olympics_10
| Edition | Year | City | Sport | Discipline | Athlete | Code | Gender | Event | Medal | Country | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold | Hungary |
| 6 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 1200M Freestyle | Gold | Hungary |
| 11 | Summer | 1896 | Athens | Athletics | Athletics | LANE, Francis | USA | Men | 100M | Bronze | United States |
| 12 | Summer | 1896 | Athens | Athletics | Athletics | SZOKOLYI, Alajos | HUN | Men | 100M | Bronze | Hungary |
| 13 | Summer | 1896 | Athens | Athletics | Athletics | BURKE, Thomas | USA | Men | 100M | Gold | United States |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36924 | Winter | 2014 | Sochi | Skiing | Snowboard | KOBER, Amelie | GER | Women | Parallel Slalom | Bronze | Germany |
| 36926 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze | United Kingdom |
| 36927 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold | United States |
| 36928 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver | Canada |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze | France |
19273 rows × 11 columns
plt.figure(figsize=(20,10))
sns.set(font_scale=1.5, palette= "dark")
sns.countplot(data = olympics_10, x = "Country", order = top_10.index)
plt.title("Top 10 Countries by Medals", fontsize = 20)
plt.show()
plt.figure(figsize=(20,10))
sns.set(font_scale=1.5, palette= "dark")
sns.countplot(data = olympics_10, x = "Country", hue = "Edition", order = top_10.index)
plt.title("Top 10 Countries by Medals", fontsize = 20)
plt.show()
plt.figure(figsize=(20,10))
sns.set(font_scale=1.5, palette= "dark")
sns.countplot(data = olympics_10, x = "Edition", hue = "Country", hue_order = top_10.index)
plt.title("Top 10 Countries by Medals", fontsize = 20)
plt.show()
plt.figure(figsize=(20,10))
sns.set(font_scale=1.5, palette= "dark")
sns.countplot(data = olympics_10, x = "Country", hue = "Medal", order = top_10.index,
hue_order = ["Gold", "Silver", "Bronze"], palette = ["gold", "silver", "brown"])
plt.title("Top 10 Countries by Medals", fontsize = 20)
plt.show()
plt.figure(figsize=(20,10))
sns.set(font_scale=1.5, palette= "dark")
sns.countplot(data = olympics_10, x = "Medal", hue = "Country",
order = ["Gold", "Silver", "Bronze"], hue_order= top_10.index)
plt.title("Top 10 Countries by Medals", fontsize = 20)
plt.show()
olympics
| Edition | Year | City | Sport | Discipline | Athlete | Code | Gender | Event | Medal | Country | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold | Hungary |
| 1 | Summer | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver | Austria |
| 2 | Summer | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze | Greece |
| 3 | Summer | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold | Greece |
| 4 | Summer | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver | Greece |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36926 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze | United Kingdom |
| 36927 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold | United States |
| 36928 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver | Canada |
| 36929 | Winter | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold | Czech Republic |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze | France |
36931 rows × 11 columns
medals_per_country = pd.crosstab(index = olympics.Country, columns = olympics.Medal,
margins = True, margins_name= "Total")
medals_per_country
| Medal | Bronze | Silver | Gold | Total |
|---|---|---|---|---|
| Country | ||||
| Afghanistan | 2 | 0 | 0 | 2 |
| Algeria | 8 | 2 | 5 | 15 |
| Argentina | 91 | 99 | 69 | 259 |
| Armenia | 8 | 2 | 1 | 11 |
| Australasia | 5 | 4 | 20 | 29 |
| ... | ... | ... | ... | ... |
| West Indies Federation | 5 | 0 | 0 | 5 |
| Yugoslavia | 119 | 180 | 143 | 442 |
| Zambia | 1 | 1 | 0 | 2 |
| Zimbabwe | 1 | 4 | 18 | 23 |
| Total | 12288 | 12238 | 12405 | 36931 |
146 rows × 4 columns
medals_per_country.drop("Total", axis = 0, inplace = True)
medals_per_country
| Medal | Bronze | Silver | Gold | Total |
|---|---|---|---|---|
| Country | ||||
| Afghanistan | 2 | 0 | 0 | 2 |
| Algeria | 8 | 2 | 5 | 15 |
| Argentina | 91 | 99 | 69 | 259 |
| Armenia | 8 | 2 | 1 | 11 |
| Australasia | 5 | 4 | 20 | 29 |
| ... | ... | ... | ... | ... |
| West Germany | 227 | 191 | 166 | 584 |
| West Indies Federation | 5 | 0 | 0 | 5 |
| Yugoslavia | 119 | 180 | 143 | 442 |
| Zambia | 1 | 1 | 0 | 2 |
| Zimbabwe | 1 | 4 | 18 | 23 |
145 rows × 4 columns
medals_per_country = medals_per_country[["Total", "Gold", "Silver", "Bronze"]].sort_values(by = ["Total", "Gold", "Silver", "Bronze"], ascending = False)
medals_per_country.head(50)
| Medal | Total | Gold | Silver | Bronze |
|---|---|---|---|---|
| Country | ||||
| United States | 5238 | 2402 | 1571 | 1265 |
| Soviet Union | 2489 | 1088 | 724 | 677 |
| United Kingdom | 1799 | 580 | 632 | 587 |
| Germany | 1665 | 589 | 504 | 572 |
| France | 1548 | 444 | 526 | 578 |
| Italy | 1488 | 534 | 473 | 481 |
| Sweden | 1477 | 476 | 496 | 505 |
| Canada | 1274 | 470 | 435 | 369 |
| Australia | 1204 | 317 | 408 | 479 |
| Hungary | 1091 | 412 | 320 | 359 |
| Russia | 1031 | 333 | 328 | 370 |
| Norway | 1011 | 368 | 371 | 272 |
| East Germany | 987 | 387 | 321 | 279 |
| Netherlands | 973 | 275 | 317 | 381 |
| Finland | 890 | 190 | 267 | 433 |
| China | 889 | 306 | 326 | 257 |
| Japan | 851 | 230 | 294 | 327 |
| Switzerland | 665 | 151 | 244 | 270 |
| Romania | 642 | 157 | 195 | 290 |
| Korea, South | 616 | 209 | 230 | 177 |
| West Germany | 584 | 166 | 191 | 227 |
| Poland | 538 | 112 | 184 | 242 |
| Denmark | 512 | 150 | 202 | 160 |
| Czechoslovakia | 487 | 82 | 224 | 181 |
| Spain | 444 | 99 | 227 | 118 |
| Yugoslavia | 442 | 143 | 180 | 119 |
| Brazil | 431 | 73 | 173 | 185 |
| Austria | 426 | 100 | 179 | 147 |
| Belgium | 424 | 93 | 172 | 159 |
| Cuba | 410 | 165 | 129 | 116 |
| Bulgaria | 339 | 54 | 145 | 140 |
| Unified Team | 283 | 128 | 75 | 80 |
| Unified Team of Germany | 281 | 76 | 108 | 97 |
| Argentina | 259 | 69 | 99 | 91 |
| New Zealand | 191 | 85 | 31 | 75 |
| India | 184 | 128 | 18 | 38 |
| Ukraine | 184 | 46 | 43 | 95 |
| Greece | 148 | 34 | 62 | 52 |
| Czech Republic | 131 | 42 | 34 | 55 |
| Belarus | 128 | 23 | 41 | 64 |
| Jamaica | 127 | 31 | 49 | 47 |
| Croatia | 125 | 50 | 40 | 35 |
| Pakistan | 121 | 42 | 45 | 34 |
| Mexico | 106 | 32 | 23 | 51 |
| South Africa | 106 | 30 | 39 | 37 |
| Kenya | 93 | 28 | 35 | 30 |
| Turkey | 86 | 38 | 25 | 23 |
| Nigeria | 84 | 19 | 38 | 27 |
| Uruguay | 76 | 44 | 2 | 30 |
| Iran | 61 | 16 | 21 | 24 |
dataDic
| Country | Code | Population | GDP | |
|---|---|---|---|---|
| 0 | Afghanistan | AFG | 32526562.0 | 594.323081 |
| 1 | Albania | ALB | 2889167.0 | 3945.217582 |
| 2 | Algeria | ALG | 39666519.0 | 4206.031232 |
| 3 | American Samoa* | ASA | 55538.0 | NaN |
| 4 | Andorra | AND | 70473.0 | NaN |
| ... | ... | ... | ... | ... |
| 196 | Vietnam | VIE | 91703800.0 | 2111.138024 |
| 197 | Virgin Islands* | ISV | 103574.0 | NaN |
| 198 | Yemen | YEM | 26832215.0 | 1406.291651 |
| 199 | Zambia | ZAM | 16211767.0 | 1304.879014 |
| 200 | Zimbabwe | ZIM | 15602751.0 | 924.143819 |
201 rows × 4 columns
medals_per_country = medals_per_country.merge(dataDic, how = "left", left_index = True,
right_on = "Country").drop(columns = ["Code"]).set_index("Country")
medals_per_country
| Total | Gold | Silver | Bronze | Population | GDP | |
|---|---|---|---|---|---|---|
| Country | ||||||
| United States | 5238 | 2402 | 1571 | 1265 | 321418820.0 | 56115.718426 |
| Soviet Union | 2489 | 1088 | 724 | 677 | NaN | NaN |
| United Kingdom | 1799 | 580 | 632 | 587 | 65138232.0 | 43875.969614 |
| Germany | 1665 | 589 | 504 | 572 | 81413145.0 | 41313.313995 |
| France | 1548 | 444 | 526 | 578 | 66808385.0 | 36205.568102 |
| ... | ... | ... | ... | ... | ... | ... |
| Iraq | 1 | 0 | 0 | 1 | 36423395.0 | 4943.760388 |
| Macedonia | 1 | 0 | 0 | 1 | 2078453.0 | 4852.657848 |
| Mauritius | 1 | 0 | 0 | 1 | 1262605.0 | 9252.110724 |
| Niger | 1 | 0 | 0 | 1 | 19899120.0 | 358.958152 |
| Togo | 1 | 0 | 0 | 1 | 7304578.0 | 559.635877 |
145 rows × 6 columns
olympics.nunique()
Edition 2 Year 33 City 41 Sport 48 Discipline 80 Athlete 26494 Code 148 Gender 2 Event 733 Medal 3 Country 145 dtype: int64
y = lambda x : x **2
y
<function __main__.<lambda>(x)>
list_x = [2,3,4,5,6,7,8]
z = map(y,list_x)
print('---',list(z))
w = map(lambda x : x **2,list_x) # One function to another function - higher order function (HOF)
print('---',list(w))
--- [4, 9, 16, 25, 36, 49, 64] --- [4, 9, 16, 25, 36, 49, 64]
olympics["Games"] = olympics.apply(lambda x: str(x.Year) + " " + x.City, axis = 1)
olympics
| Edition | Year | City | Sport | Discipline | Athlete | Code | Gender | Event | Medal | Country | Games | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold | Hungary | 1896 Athens |
| 1 | Summer | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver | Austria | 1896 Athens |
| 2 | Summer | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze | Greece | 1896 Athens |
| 3 | Summer | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold | Greece | 1896 Athens |
| 4 | Summer | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver | Greece | 1896 Athens |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36926 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze | United Kingdom | 2014 Sochi |
| 36927 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold | United States | 2014 Sochi |
| 36928 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver | Canada | 2014 Sochi |
| 36929 | Winter | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold | Czech Republic | 2014 Sochi |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze | France | 2014 Sochi |
36931 rows × 12 columns
olympics.Games.nunique()
49
olympics.Games.value_counts()
2008 Beijing 2042 2000 Sydney 2015 2004 Athens 1998 2012 London 1945 1996 Atlanta 1859 1992 Barcelona 1705 1988 Seoul 1546 1984 Los Angeles 1459 1980 Moscow 1387 1976 Montreal 1305 1920 Antwerp 1298 1972 Munich 1185 1968 Mexico 1031 1964 Tokyo 1010 1952 Helsinki 889 1956 Melbourne / Stockholm 885 1912 Stockholm 885 1924 Paris 884 1960 Rome 882 1936 Berlin 875 1948 London 814 1908 London 804 1928 Amsterdam 710 1932 Los Angeles 615 2014 Sochi 612 2006 Turin 531 2010 Vancouver 529 1900 Paris 512 2002 Salt Lake City 481 1904 St Louis 470 1998 Nagano 447 1994 Lillehammer 343 1992 Albertville 325 1988 Calgary 264 1984 Sarajevo 222 1980 Lake Placid 218 1976 Innsbruck 210 1972 Sapporo 200 1968 Grenoble 199 1964 Innsbruck 185 1896 Athens 151 1956 Cortina d'Ampezzo 150 1960 Squaw Valley 147 1948 St.Moritz 140 1952 Oslo 136 1924 Chamonix 118 1932 Lake Placid 116 1936 Garmisch Partenkirchen 108 1928 St.Moritz 89 Name: Games, dtype: int64
olympics.groupby("Country").apply(lambda x: x.Games.nunique())
Country
Afghanistan 2
Algeria 6
Argentina 18
Armenia 4
Australasia 2
..
West Germany 12
West Indies Federation 1
Yugoslavia 18
Zambia 2
Zimbabwe 3
Length: 145, dtype: int64
medals_per_country["Total_Games"] = olympics.groupby("Country").apply(lambda x: x.Games.nunique())
medals_per_country.head(50)
| Total | Gold | Silver | Bronze | Population | GDP | Total_Games | |
|---|---|---|---|---|---|---|---|
| Country | |||||||
| United States | 5238 | 2402 | 1571 | 1265 | 3.214188e+08 | 56115.718426 | 48 |
| Soviet Union | 2489 | 1088 | 724 | 677 | NaN | NaN | 18 |
| United Kingdom | 1799 | 580 | 632 | 587 | 6.513823e+07 | 43875.969614 | 42 |
| Germany | 1665 | 589 | 504 | 572 | 8.141314e+07 | 41313.313995 | 25 |
| France | 1548 | 444 | 526 | 578 | 6.680838e+07 | 36205.568102 | 47 |
| Italy | 1488 | 534 | 473 | 481 | 6.080208e+07 | 29957.804315 | 43 |
| Sweden | 1477 | 476 | 496 | 505 | 9.798871e+06 | 50579.673649 | 47 |
| Canada | 1274 | 470 | 435 | 369 | 3.585177e+07 | 43248.529909 | 47 |
| Australia | 1204 | 317 | 408 | 479 | 2.378117e+07 | 56310.962993 | 30 |
| Hungary | 1091 | 412 | 320 | 359 | 9.844686e+06 | 12363.543460 | 31 |
| Russia | 1031 | 333 | 328 | 370 | 1.440968e+08 | 9092.580536 | 11 |
| Norway | 1011 | 368 | 371 | 272 | 5.195921e+06 | 74400.369777 | 44 |
| East Germany | 987 | 387 | 321 | 279 | NaN | NaN | 11 |
| Netherlands | 973 | 275 | 317 | 381 | 1.693652e+07 | 44299.768085 | 39 |
| Finland | 890 | 190 | 267 | 433 | 5.482013e+06 | 42311.036231 | 46 |
| China | 889 | 306 | 326 | 257 | 1.371220e+09 | 8027.683810 | 15 |
| Japan | 851 | 230 | 294 | 327 | 1.269585e+08 | 32477.215145 | 32 |
| Switzerland | 665 | 151 | 244 | 270 | 8.286976e+06 | 80945.079219 | 46 |
| Romania | 642 | 157 | 195 | 290 | 1.983239e+07 | 8972.922518 | 19 |
| Korea, South | 616 | 209 | 230 | 177 | 5.061704e+07 | 27221.524051 | 22 |
| West Germany | 584 | 166 | 191 | 227 | NaN | NaN | 12 |
| Poland | 538 | 112 | 184 | 242 | 3.799949e+07 | 12554.547554 | 27 |
| Denmark | 512 | 150 | 202 | 160 | 5.676002e+06 | 51989.293471 | 27 |
| Czechoslovakia | 487 | 82 | 224 | 181 | NaN | NaN | 27 |
| Spain | 444 | 99 | 227 | 118 | 4.641827e+07 | 25831.582305 | 20 |
| Yugoslavia | 442 | 143 | 180 | 119 | NaN | NaN | 18 |
| Brazil | 431 | 73 | 173 | 185 | 2.078475e+08 | 8538.589975 | 18 |
| Austria | 426 | 100 | 179 | 147 | 8.611088e+06 | 43774.985174 | 46 |
| Belgium | 424 | 93 | 172 | 159 | 1.128572e+07 | 40324.027766 | 28 |
| Cuba | 410 | 165 | 129 | 116 | 1.138956e+07 | NaN | 14 |
| Bulgaria | 339 | 54 | 145 | 140 | 7.177991e+06 | 6993.477360 | 19 |
| Unified Team | 283 | 128 | 75 | 80 | NaN | NaN | 2 |
| Unified Team of Germany | 281 | 76 | 108 | 97 | NaN | NaN | 6 |
| Argentina | 259 | 69 | 99 | 91 | 4.341676e+07 | 13431.878340 | 18 |
| New Zealand | 191 | 85 | 31 | 75 | 4.595700e+06 | 37807.967276 | 21 |
| India | 184 | 128 | 18 | 38 | 1.311051e+09 | 1598.259034 | 17 |
| Ukraine | 184 | 46 | 43 | 95 | 4.519820e+07 | 2114.954716 | 9 |
| Greece | 148 | 34 | 62 | 52 | 1.082373e+07 | 18002.230578 | 18 |
| Czech Republic | 131 | 42 | 34 | 55 | 1.055122e+07 | 17548.338213 | 10 |
| Belarus | 128 | 23 | 41 | 64 | 9.513000e+06 | 5740.456495 | 11 |
| Jamaica | 127 | 31 | 49 | 47 | 2.725941e+06 | 5232.024583 | 14 |
| Croatia | 125 | 50 | 40 | 35 | 4.224404e+06 | 11535.829356 | 10 |
| Pakistan | 121 | 42 | 45 | 34 | 1.889249e+08 | 1434.696665 | 9 |
| Mexico | 106 | 32 | 23 | 51 | 1.270172e+08 | 9005.024265 | 19 |
| South Africa | 106 | 30 | 39 | 37 | 5.495692e+07 | 5723.973357 | 17 |
| Kenya | 93 | 28 | 35 | 30 | 4.605030e+07 | 1376.712829 | 11 |
| Turkey | 86 | 38 | 25 | 23 | 7.866583e+07 | 9125.687590 | 16 |
| Nigeria | 84 | 19 | 38 | 27 | 1.822020e+08 | 2640.290739 | 8 |
| Uruguay | 76 | 44 | 2 | 30 | 3.431555e+06 | 15573.900919 | 8 |
| Iran | 61 | 16 | 21 | 24 | 7.910927e+07 | NaN | 15 |
medals_per_country.rank(ascending = False).head(50)
| Total | Gold | Silver | Bronze | Population | GDP | Total_Games | |
|---|---|---|---|---|---|---|---|
| Country | |||||||
| United States | 1.0 | 1.0 | 1.0 | 1.0 | 3.0 | 7.0 | 1.0 |
| Soviet Union | 2.0 | 2.0 | 2.0 | 2.0 | NaN | NaN | 28.0 |
| United Kingdom | 3.0 | 4.0 | 3.0 | 3.0 | 20.0 | 13.0 | 10.0 |
| Germany | 4.0 | 3.0 | 5.0 | 5.0 | 15.0 | 18.0 | 19.0 |
| France | 5.0 | 8.0 | 4.0 | 4.0 | 19.0 | 22.0 | 3.0 |
| Italy | 6.0 | 5.0 | 7.0 | 7.0 | 21.0 | 25.0 | 9.0 |
| Sweden | 7.0 | 6.0 | 6.0 | 6.0 | 71.0 | 10.0 | 3.0 |
| Canada | 8.0 | 7.0 | 8.0 | 12.0 | 35.0 | 15.0 | 3.0 |
| Australia | 9.0 | 13.0 | 9.0 | 8.0 | 46.0 | 6.0 | 14.0 |
| Hungary | 10.0 | 9.0 | 14.0 | 13.0 | 70.0 | 48.0 | 13.0 |
| Russia | 11.0 | 12.0 | 11.0 | 11.0 | 8.0 | 57.0 | 45.0 |
| Norway | 12.0 | 11.0 | 10.0 | 17.0 | 90.0 | 3.0 | 8.0 |
| East Germany | 13.0 | 10.0 | 13.0 | 16.0 | NaN | NaN | 45.0 |
| Netherlands | 14.0 | 15.0 | 15.0 | 10.0 | 55.0 | 12.0 | 11.0 |
| Finland | 15.0 | 18.0 | 17.0 | 9.0 | 88.0 | 17.0 | 6.0 |
| China | 16.0 | 14.0 | 12.0 | 19.0 | 1.0 | 63.0 | 34.5 |
| Japan | 17.0 | 16.0 | 16.0 | 14.0 | 10.0 | 24.0 | 12.0 |
| Switzerland | 18.0 | 22.0 | 18.0 | 18.0 | 78.0 | 2.0 | 6.0 |
| Romania | 19.0 | 21.0 | 23.0 | 15.0 | 51.0 | 59.0 | 24.0 |
| Korea, South | 20.0 | 17.0 | 19.0 | 24.0 | 24.0 | 27.0 | 20.0 |
| West Germany | 21.0 | 19.0 | 24.0 | 21.0 | NaN | NaN | 40.0 |
| Poland | 22.0 | 27.0 | 25.0 | 20.0 | 33.0 | 47.0 | 17.0 |
| Denmark | 23.0 | 23.0 | 22.0 | 25.0 | 86.0 | 9.0 | 17.0 |
| Czechoslovakia | 24.0 | 32.0 | 21.0 | 23.0 | NaN | NaN | 17.0 |
| Spain | 25.0 | 29.0 | 20.0 | 30.0 | 26.0 | 28.0 | 22.0 |
| Yugoslavia | 26.0 | 24.0 | 26.0 | 29.0 | NaN | NaN | 28.0 |
| Brazil | 27.0 | 34.0 | 28.0 | 22.0 | 5.0 | 60.0 | 28.0 |
| Austria | 28.0 | 28.0 | 27.0 | 27.0 | 75.0 | 14.0 | 6.0 |
| Belgium | 29.0 | 30.0 | 29.0 | 26.0 | 62.0 | 20.0 | 15.0 |
| Cuba | 30.0 | 20.0 | 31.0 | 31.0 | 61.0 | NaN | 36.5 |
| Bulgaria | 31.0 | 36.0 | 30.0 | 28.0 | 81.0 | 64.0 | 24.0 |
| Unified Team | 32.0 | 25.5 | 34.0 | 35.0 | NaN | NaN | 107.5 |
| Unified Team of Germany | 33.0 | 33.0 | 32.0 | 32.0 | NaN | NaN | 73.0 |
| Argentina | 34.0 | 35.0 | 33.0 | 34.0 | 29.0 | 44.0 | 28.0 |
| New Zealand | 35.0 | 31.0 | 45.0 | 36.0 | 93.0 | 21.0 | 21.0 |
| India | 36.5 | 25.5 | 52.0 | 43.0 | 2.0 | 100.0 | 31.5 |
| Ukraine | 36.5 | 38.0 | 38.0 | 33.0 | 28.0 | 96.0 | 57.0 |
| Greece | 38.0 | 43.0 | 35.0 | 39.0 | 65.0 | 35.0 | 28.0 |
| Czech Republic | 39.0 | 40.5 | 44.0 | 38.0 | 67.0 | 36.0 | 50.5 |
| Belarus | 40.0 | 48.5 | 39.0 | 37.0 | 73.0 | 71.0 | 45.0 |
| Jamaica | 41.0 | 45.0 | 36.0 | 41.0 | 104.0 | 75.0 | 36.5 |
| Croatia | 42.0 | 37.0 | 40.0 | 45.0 | 94.0 | 49.0 | 50.5 |
| Pakistan | 43.0 | 40.5 | 37.0 | 46.0 | 6.0 | 101.0 | 57.0 |
| Mexico | 44.5 | 44.0 | 48.0 | 40.0 | 9.0 | 58.0 | 24.0 |
| South Africa | 44.5 | 46.0 | 41.0 | 44.0 | 22.0 | 72.0 | 31.5 |
| Kenya | 46.0 | 47.0 | 43.0 | 48.5 | 27.0 | 103.0 | 45.0 |
| Turkey | 47.0 | 42.0 | 47.0 | 54.5 | 17.0 | 56.0 | 33.0 |
| Nigeria | 48.0 | 53.0 | 42.0 | 51.0 | 7.0 | 93.0 | 64.0 |
| Uruguay | 49.0 | 39.0 | 94.5 | 48.5 | 100.0 | 40.0 | 64.0 |
| Iran | 50.0 | 55.0 | 49.0 | 53.0 | 16.0 | NaN | 34.5 |
In the follwing work with Ranks! Check whether GDP (Standard of Living), Total_Games (Political Stability measure) and Population (Size) have an effect on Total Medals. (hint: work with spearman correlation, not with pearson correlation)
medals_per_country
| Total | Gold | Silver | Bronze | Population | GDP | Total_Games | |
|---|---|---|---|---|---|---|---|
| Country | |||||||
| United States | 5238 | 2402 | 1571 | 1265 | 321418820.0 | 56115.718426 | 48 |
| Soviet Union | 2489 | 1088 | 724 | 677 | NaN | NaN | 18 |
| United Kingdom | 1799 | 580 | 632 | 587 | 65138232.0 | 43875.969614 | 42 |
| Germany | 1665 | 589 | 504 | 572 | 81413145.0 | 41313.313995 | 25 |
| France | 1548 | 444 | 526 | 578 | 66808385.0 | 36205.568102 | 47 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| Iraq | 1 | 0 | 0 | 1 | 36423395.0 | 4943.760388 | 1 |
| Macedonia | 1 | 0 | 0 | 1 | 2078453.0 | 4852.657848 | 1 |
| Mauritius | 1 | 0 | 0 | 1 | 1262605.0 | 9252.110724 | 1 |
| Niger | 1 | 0 | 0 | 1 | 19899120.0 | 358.958152 | 1 |
| Togo | 1 | 0 | 0 | 1 | 7304578.0 | 559.635877 | 1 |
145 rows × 7 columns
medals_per_country.drop(columns = ["Gold", "Silver", "Bronze"], inplace = True)
medals_per_country.corr(method = "pearson")
| Total | Population | GDP | Total_Games | |
|---|---|---|---|---|
| Total | 1.000000 | 0.206606 | 0.437131 | 0.712917 |
| Population | 0.206606 | 1.000000 | -0.089976 | 0.123700 |
| GDP | 0.437131 | -0.089976 | 1.000000 | 0.563540 |
| Total_Games | 0.712917 | 0.123700 | 0.563540 | 1.000000 |
medals_per_country.corr(method = "spearman")
| Total | Population | GDP | Total_Games | |
|---|---|---|---|---|
| Total | 1.000000 | 0.419755 | 0.458478 | 0.927611 |
| Population | 0.419755 | 1.000000 | -0.239097 | 0.402098 |
| GDP | 0.458478 | -0.239097 | 1.000000 | 0.498056 |
| Total_Games | 0.927611 | 0.402098 | 0.498056 | 1.000000 |
medals_per_country.rank(ascending = False).corr(method = "pearson")
| Total | Population | GDP | Total_Games | |
|---|---|---|---|---|
| Total | 1.000000 | 0.419634 | 0.458508 | 0.927611 |
| Population | 0.419634 | 1.000000 | -0.239567 | 0.401448 |
| GDP | 0.458508 | -0.239567 | 1.000000 | 0.497109 |
| Total_Games | 0.927611 | 0.401448 | 0.497109 | 1.000000 |
import scipy.stats as stats
Hypothesis 1: There is no relationship between Total Medals and Population
stats.spearmanr(medals_per_country.Total, medals_per_country.Population,
nan_policy = "omit")
SignificanceResult(statistic=0.41975527887063924, pvalue=8.132240868116897e-07)
r, p_value = stats.spearmanr(medals_per_country.Total, medals_per_country.Population,
nan_policy = "omit")
r
0.41975527887063924
round(p_value)
0
Reject Hypothesis 1 -> There is a significant (positive) relationship between Total Medals and Population
Hypothesis 2: There is no relationship between Total Medals and GDP per Capita
r, p_value = stats.spearmanr(medals_per_country.Total, medals_per_country.GDP,
nan_policy = "omit")
r
0.45847787983426685
p_value
1.5758169913239602e-07
Reject Hypothesis 2 -> There is a significant (positive) relationship between Total Medals and GDP per Capita
Hypothesis 3: There is no relationship between Total Medals and Participations
medals_per_country.head(50)
| Total | Population | GDP | Total_Games | |
|---|---|---|---|---|
| Country | ||||
| United States | 5238 | 3.214188e+08 | 56115.718426 | 48 |
| Soviet Union | 2489 | NaN | NaN | 18 |
| United Kingdom | 1799 | 6.513823e+07 | 43875.969614 | 42 |
| Germany | 1665 | 8.141314e+07 | 41313.313995 | 25 |
| France | 1548 | 6.680838e+07 | 36205.568102 | 47 |
| Italy | 1488 | 6.080208e+07 | 29957.804315 | 43 |
| Sweden | 1477 | 9.798871e+06 | 50579.673649 | 47 |
| Canada | 1274 | 3.585177e+07 | 43248.529909 | 47 |
| Australia | 1204 | 2.378117e+07 | 56310.962993 | 30 |
| Hungary | 1091 | 9.844686e+06 | 12363.543460 | 31 |
| Russia | 1031 | 1.440968e+08 | 9092.580536 | 11 |
| Norway | 1011 | 5.195921e+06 | 74400.369777 | 44 |
| East Germany | 987 | NaN | NaN | 11 |
| Netherlands | 973 | 1.693652e+07 | 44299.768085 | 39 |
| Finland | 890 | 5.482013e+06 | 42311.036231 | 46 |
| China | 889 | 1.371220e+09 | 8027.683810 | 15 |
| Japan | 851 | 1.269585e+08 | 32477.215145 | 32 |
| Switzerland | 665 | 8.286976e+06 | 80945.079219 | 46 |
| Romania | 642 | 1.983239e+07 | 8972.922518 | 19 |
| Korea, South | 616 | 5.061704e+07 | 27221.524051 | 22 |
| West Germany | 584 | NaN | NaN | 12 |
| Poland | 538 | 3.799949e+07 | 12554.547554 | 27 |
| Denmark | 512 | 5.676002e+06 | 51989.293471 | 27 |
| Czechoslovakia | 487 | NaN | NaN | 27 |
| Spain | 444 | 4.641827e+07 | 25831.582305 | 20 |
| Yugoslavia | 442 | NaN | NaN | 18 |
| Brazil | 431 | 2.078475e+08 | 8538.589975 | 18 |
| Austria | 426 | 8.611088e+06 | 43774.985174 | 46 |
| Belgium | 424 | 1.128572e+07 | 40324.027766 | 28 |
| Cuba | 410 | 1.138956e+07 | NaN | 14 |
| Bulgaria | 339 | 7.177991e+06 | 6993.477360 | 19 |
| Unified Team | 283 | NaN | NaN | 2 |
| Unified Team of Germany | 281 | NaN | NaN | 6 |
| Argentina | 259 | 4.341676e+07 | 13431.878340 | 18 |
| New Zealand | 191 | 4.595700e+06 | 37807.967276 | 21 |
| India | 184 | 1.311051e+09 | 1598.259034 | 17 |
| Ukraine | 184 | 4.519820e+07 | 2114.954716 | 9 |
| Greece | 148 | 1.082373e+07 | 18002.230578 | 18 |
| Czech Republic | 131 | 1.055122e+07 | 17548.338213 | 10 |
| Belarus | 128 | 9.513000e+06 | 5740.456495 | 11 |
| Jamaica | 127 | 2.725941e+06 | 5232.024583 | 14 |
| Croatia | 125 | 4.224404e+06 | 11535.829356 | 10 |
| Pakistan | 121 | 1.889249e+08 | 1434.696665 | 9 |
| Mexico | 106 | 1.270172e+08 | 9005.024265 | 19 |
| South Africa | 106 | 5.495692e+07 | 5723.973357 | 17 |
| Kenya | 93 | 4.605030e+07 | 1376.712829 | 11 |
| Turkey | 86 | 7.866583e+07 | 9125.687590 | 16 |
| Nigeria | 84 | 1.822020e+08 | 2640.290739 | 8 |
| Uruguay | 76 | 3.431555e+06 | 15573.900919 | 8 |
| Iran | 61 | 7.910927e+07 | NaN | 15 |
r, p_value = stats.spearmanr(medals_per_country.head(50).Total,
medals_per_country.head(50).Total_Games)
r
0.6818689415046658
p_value
4.982151209468708e-08
Reject Hypothesis 3 -> There is a significant (positive) relationship between Total Medals and Participations
Create the following Seaborn Heatmap with Medal Ranks for Top 50 Countries (Total Medals, Summer Games Medals, Winter Games Medals, Men, Women).
olympics
| Edition | Year | City | Sport | Discipline | Athlete | Code | Gender | Event | Medal | Country | Games | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold | Hungary | 1896 Athens |
| 1 | Summer | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver | Austria | 1896 Athens |
| 2 | Summer | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze | Greece | 1896 Athens |
| 3 | Summer | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold | Greece | 1896 Athens |
| 4 | Summer | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver | Greece | 1896 Athens |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36926 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze | United Kingdom | 2014 Sochi |
| 36927 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold | United States | 2014 Sochi |
| 36928 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver | Canada | 2014 Sochi |
| 36929 | Winter | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold | Czech Republic | 2014 Sochi |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze | France | 2014 Sochi |
36931 rows × 12 columns
sum_vs_win = pd.crosstab(olympics.Country, olympics.Edition)
sum_vs_win
| Edition | Summer | Winter |
|---|---|---|
| Country | ||
| Afghanistan | 2 | 0 |
| Algeria | 15 | 0 |
| Argentina | 259 | 0 |
| Armenia | 11 | 0 |
| Australasia | 29 | 0 |
| ... | ... | ... |
| West Germany | 490 | 94 |
| West Indies Federation | 5 | 0 |
| Yugoslavia | 435 | 7 |
| Zambia | 2 | 0 |
| Zimbabwe | 23 | 0 |
145 rows × 2 columns
men_vs_women = pd.crosstab(olympics.Country, olympics.Gender)
men_vs_women
| Gender | Men | Women |
|---|---|---|
| Country | ||
| Afghanistan | 2 | 0 |
| Algeria | 12 | 3 |
| Argentina | 187 | 72 |
| Armenia | 11 | 0 |
| Australasia | 27 | 2 |
| ... | ... | ... |
| West Germany | 450 | 134 |
| West Indies Federation | 5 | 0 |
| Yugoslavia | 379 | 63 |
| Zambia | 2 | 0 |
| Zimbabwe | 0 | 23 |
145 rows × 2 columns
medals_by_cats = pd.concat([sum_vs_win, men_vs_women], axis = 1)
medals_by_cats
| Summer | Winter | Men | Women | |
|---|---|---|---|---|
| Country | ||||
| Afghanistan | 2 | 0 | 2 | 0 |
| Algeria | 15 | 0 | 12 | 3 |
| Argentina | 259 | 0 | 187 | 72 |
| Armenia | 11 | 0 | 11 | 0 |
| Australasia | 29 | 0 | 27 | 2 |
| ... | ... | ... | ... | ... |
| West Germany | 490 | 94 | 450 | 134 |
| West Indies Federation | 5 | 0 | 5 | 0 |
| Yugoslavia | 435 | 7 | 379 | 63 |
| Zambia | 2 | 0 | 2 | 0 |
| Zimbabwe | 23 | 0 | 0 | 23 |
145 rows × 4 columns
medals_by_cats["Total"] = medals_by_cats.Summer + medals_by_cats.Winter
medals_by_cats
| Summer | Winter | Men | Women | Total | |
|---|---|---|---|---|---|
| Country | |||||
| Afghanistan | 2 | 0 | 2 | 0 | 2 |
| Algeria | 15 | 0 | 12 | 3 | 15 |
| Argentina | 259 | 0 | 187 | 72 | 259 |
| Armenia | 11 | 0 | 11 | 0 | 11 |
| Australasia | 29 | 0 | 27 | 2 | 29 |
| ... | ... | ... | ... | ... | ... |
| West Germany | 490 | 94 | 450 | 134 | 584 |
| West Indies Federation | 5 | 0 | 5 | 0 | 5 |
| Yugoslavia | 435 | 7 | 379 | 63 | 442 |
| Zambia | 2 | 0 | 2 | 0 | 2 |
| Zimbabwe | 23 | 0 | 0 | 23 | 23 |
145 rows × 5 columns
medals_by_cats.sort_values("Total", ascending = False, inplace = True)
medals_by_cats
| Summer | Winter | Men | Women | Total | |
|---|---|---|---|---|---|
| Country | |||||
| United States | 4585 | 653 | 3618 | 1620 | 5238 |
| Soviet Union | 2049 | 440 | 1807 | 682 | 2489 |
| United Kingdom | 1720 | 79 | 1470 | 329 | 1799 |
| Germany | 1305 | 360 | 1119 | 546 | 1665 |
| France | 1396 | 152 | 1351 | 197 | 1548 |
| ... | ... | ... | ... | ... | ... |
| Iraq | 1 | 0 | 1 | 0 | 1 |
| Senegal | 1 | 0 | 1 | 0 | 1 |
| Macedonia | 1 | 0 | 1 | 0 | 1 |
| Mauritius | 1 | 0 | 1 | 0 | 1 |
| Guatemala | 1 | 0 | 1 | 0 | 1 |
145 rows × 5 columns
ranks = medals_by_cats.rank(ascending = False, method = "average")
ranks
| Summer | Winter | Men | Women | Total | |
|---|---|---|---|---|---|
| Country | |||||
| United States | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
| Soviet Union | 2.0 | 4.0 | 2.0 | 2.0 | 2.0 |
| United Kingdom | 3.0 | 19.0 | 3.0 | 11.0 | 3.0 |
| Germany | 5.0 | 7.0 | 7.0 | 4.0 | 4.0 |
| France | 4.0 | 14.0 | 4.0 | 16.5 | 5.0 |
| ... | ... | ... | ... | ... | ... |
| Iraq | 132.5 | 95.5 | 127.0 | 120.0 | 133.5 |
| Senegal | 132.5 | 95.5 | 127.0 | 120.0 | 133.5 |
| Macedonia | 132.5 | 95.5 | 127.0 | 120.0 | 133.5 |
| Mauritius | 132.5 | 95.5 | 127.0 | 120.0 | 133.5 |
| Guatemala | 132.5 | 95.5 | 127.0 | 120.0 | 133.5 |
145 rows × 5 columns
top_50 = ranks.head(50)
plt.figure(figsize = (50, 10))
sns.heatmap(top_50.T,cmap='RdYlGn_r',annot=True,fmt='2.0f')
plt.show()
Identify Countries that are
What could be the reasons?
top_50.sort_values("Summer")
| Summer | Winter | Men | Women | Total | |
|---|---|---|---|---|---|
| Country | |||||
| United States | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
| Soviet Union | 2.0 | 4.0 | 2.0 | 2.0 | 2.0 |
| United Kingdom | 3.0 | 19.0 | 3.0 | 11.0 | 3.0 |
| France | 4.0 | 14.0 | 4.0 | 16.5 | 5.0 |
| Germany | 5.0 | 7.0 | 7.0 | 4.0 | 4.0 |
| Italy | 6.0 | 11.0 | 5.0 | 18.0 | 6.0 |
| Australia | 7.0 | 27.0 | 12.0 | 5.0 | 9.0 |
| Hungary | 8.0 | 30.0 | 8.0 | 14.0 | 10.0 |
| Sweden | 9.0 | 6.0 | 6.0 | 16.5 | 7.0 |
| Netherlands | 10.0 | 15.0 | 14.0 | 9.0 | 14.0 |
| East Germany | 11.0 | 12.0 | 17.0 | 8.0 | 13.0 |
| China | 12.0 | 18.0 | 30.0 | 3.0 | 16.0 |
| Japan | 13.0 | 21.0 | 13.0 | 13.0 | 17.0 |
| Russia | 14.0 | 10.0 | 16.0 | 6.0 | 11.0 |
| Canada | 15.0 | 2.0 | 9.0 | 7.0 | 8.0 |
| Romania | 16.0 | 41.5 | 28.0 | 10.0 | 19.0 |
| Norway | 17.0 | 3.0 | 10.0 | 15.0 | 12.0 |
| Korea, South | 18.0 | 17.0 | 25.5 | 12.0 | 20.0 |
| Poland | 19.0 | 23.0 | 19.0 | 24.0 | 22.0 |
| Denmark | 20.0 | 38.5 | 20.0 | 29.5 | 23.0 |
| West Germany | 21.0 | 16.0 | 18.0 | 19.0 | 21.0 |
| Finland | 22.0 | 5.0 | 11.0 | 23.0 | 15.0 |
| Spain | 23.0 | 41.5 | 24.0 | 25.0 | 25.0 |
| Yugoslavia | 24.0 | 35.0 | 23.0 | 36.0 | 26.0 |
| Brazil | 25.0 | 95.5 | 29.0 | 20.0 | 27.0 |
| Belgium | 26.0 | 29.0 | 21.0 | 43.0 | 29.0 |
| Cuba | 27.0 | 95.5 | 27.0 | 26.0 | 30.0 |
| Switzerland | 28.0 | 8.0 | 15.0 | 28.0 | 18.0 |
| Bulgaria | 29.0 | 37.0 | 32.0 | 21.0 | 31.0 |
| Czechoslovakia | 30.0 | 13.0 | 22.0 | 31.0 | 24.0 |
| Unified Team of Germany | 31.0 | 24.0 | 31.0 | 35.0 | 33.0 |
| Argentina | 32.0 | 95.5 | 33.0 | 32.0 | 34.0 |
| Unified Team | 33.0 | 22.0 | 35.0 | 22.0 | 32.0 |
| New Zealand | 34.0 | 44.5 | 36.0 | 39.0 | 35.0 |
| India | 35.0 | 95.5 | 34.0 | 71.5 | 36.5 |
| Ukraine | 36.0 | 31.5 | 42.0 | 27.0 | 36.5 |
| Greece | 37.0 | 95.5 | 39.0 | 37.0 | 38.0 |
| Austria | 38.0 | 9.0 | 25.5 | 29.5 | 28.0 |
| Jamaica | 39.0 | 95.5 | 50.0 | 33.5 | 41.0 |
| Pakistan | 40.0 | 95.5 | 37.0 | 120.0 | 43.0 |
| Croatia | 41.0 | 31.5 | 38.0 | 53.5 | 42.0 |
| Belarus | 42.0 | 27.0 | 49.0 | 33.5 | 40.0 |
| Mexico | 43.5 | 95.5 | 41.0 | 46.0 | 44.5 |
| South Africa | 43.5 | 95.5 | 43.0 | 42.0 | 44.5 |
| Kenya | 45.0 | 95.5 | 44.0 | 51.5 | 46.0 |
| Turkey | 46.0 | 95.5 | 45.0 | 58.5 | 47.0 |
| Nigeria | 47.0 | 95.5 | 47.0 | 44.5 | 48.0 |
| Uruguay | 48.0 | 95.5 | 46.0 | 120.0 | 49.0 |
| Iran | 49.0 | 95.5 | 48.0 | 120.0 | 50.0 |
| Czech Republic | 51.0 | 20.0 | 40.0 | 38.0 | 39.0 |
top_50.sort_values("Winter")
| Summer | Winter | Men | Women | Total | |
|---|---|---|---|---|---|
| Country | |||||
| United States | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
| Canada | 15.0 | 2.0 | 9.0 | 7.0 | 8.0 |
| Norway | 17.0 | 3.0 | 10.0 | 15.0 | 12.0 |
| Soviet Union | 2.0 | 4.0 | 2.0 | 2.0 | 2.0 |
| Finland | 22.0 | 5.0 | 11.0 | 23.0 | 15.0 |
| Sweden | 9.0 | 6.0 | 6.0 | 16.5 | 7.0 |
| Germany | 5.0 | 7.0 | 7.0 | 4.0 | 4.0 |
| Switzerland | 28.0 | 8.0 | 15.0 | 28.0 | 18.0 |
| Austria | 38.0 | 9.0 | 25.5 | 29.5 | 28.0 |
| Russia | 14.0 | 10.0 | 16.0 | 6.0 | 11.0 |
| Italy | 6.0 | 11.0 | 5.0 | 18.0 | 6.0 |
| East Germany | 11.0 | 12.0 | 17.0 | 8.0 | 13.0 |
| Czechoslovakia | 30.0 | 13.0 | 22.0 | 31.0 | 24.0 |
| France | 4.0 | 14.0 | 4.0 | 16.5 | 5.0 |
| Netherlands | 10.0 | 15.0 | 14.0 | 9.0 | 14.0 |
| West Germany | 21.0 | 16.0 | 18.0 | 19.0 | 21.0 |
| Korea, South | 18.0 | 17.0 | 25.5 | 12.0 | 20.0 |
| China | 12.0 | 18.0 | 30.0 | 3.0 | 16.0 |
| United Kingdom | 3.0 | 19.0 | 3.0 | 11.0 | 3.0 |
| Czech Republic | 51.0 | 20.0 | 40.0 | 38.0 | 39.0 |
| Japan | 13.0 | 21.0 | 13.0 | 13.0 | 17.0 |
| Unified Team | 33.0 | 22.0 | 35.0 | 22.0 | 32.0 |
| Poland | 19.0 | 23.0 | 19.0 | 24.0 | 22.0 |
| Unified Team of Germany | 31.0 | 24.0 | 31.0 | 35.0 | 33.0 |
| Belarus | 42.0 | 27.0 | 49.0 | 33.5 | 40.0 |
| Australia | 7.0 | 27.0 | 12.0 | 5.0 | 9.0 |
| Belgium | 26.0 | 29.0 | 21.0 | 43.0 | 29.0 |
| Hungary | 8.0 | 30.0 | 8.0 | 14.0 | 10.0 |
| Ukraine | 36.0 | 31.5 | 42.0 | 27.0 | 36.5 |
| Croatia | 41.0 | 31.5 | 38.0 | 53.5 | 42.0 |
| Yugoslavia | 24.0 | 35.0 | 23.0 | 36.0 | 26.0 |
| Bulgaria | 29.0 | 37.0 | 32.0 | 21.0 | 31.0 |
| Denmark | 20.0 | 38.5 | 20.0 | 29.5 | 23.0 |
| Spain | 23.0 | 41.5 | 24.0 | 25.0 | 25.0 |
| Romania | 16.0 | 41.5 | 28.0 | 10.0 | 19.0 |
| New Zealand | 34.0 | 44.5 | 36.0 | 39.0 | 35.0 |
| India | 35.0 | 95.5 | 34.0 | 71.5 | 36.5 |
| Cuba | 27.0 | 95.5 | 27.0 | 26.0 | 30.0 |
| Greece | 37.0 | 95.5 | 39.0 | 37.0 | 38.0 |
| Brazil | 25.0 | 95.5 | 29.0 | 20.0 | 27.0 |
| Uruguay | 48.0 | 95.5 | 46.0 | 120.0 | 49.0 |
| Jamaica | 39.0 | 95.5 | 50.0 | 33.5 | 41.0 |
| Pakistan | 40.0 | 95.5 | 37.0 | 120.0 | 43.0 |
| Mexico | 43.5 | 95.5 | 41.0 | 46.0 | 44.5 |
| South Africa | 43.5 | 95.5 | 43.0 | 42.0 | 44.5 |
| Kenya | 45.0 | 95.5 | 44.0 | 51.5 | 46.0 |
| Turkey | 46.0 | 95.5 | 45.0 | 58.5 | 47.0 |
| Nigeria | 47.0 | 95.5 | 47.0 | 44.5 | 48.0 |
| Argentina | 32.0 | 95.5 | 33.0 | 32.0 | 34.0 |
| Iran | 49.0 | 95.5 | 48.0 | 120.0 | 50.0 |
plt.figure(figsize = (50, 10))
sns.heatmap(top_50.sort_values(by = "Summer").T,cmap='RdYlGn_r',annot=True,fmt='2.0f')
plt.show()
plt.figure(figsize = (50, 10))
sns.heatmap(top_50.sort_values(by = "Winter").T,cmap='RdYlGn_r',annot=True,fmt='2.0f')
plt.show()
top_50.Summer.sub(top_50.Winter).sort_values()
Country Brazil -70.5 Cuba -68.5 Argentina -63.5 India -60.5 Greece -58.5 Jamaica -56.5 Pakistan -55.5 South Africa -52.0 Mexico -52.0 Kenya -50.5 Turkey -49.5 Nigeria -48.5 Uruguay -47.5 Iran -46.5 Romania -25.5 Hungary -22.0 Australia -20.0 Denmark -18.5 Spain -18.5 United Kingdom -16.0 Yugoslavia -11.0 New Zealand -10.5 France -10.0 Japan -8.0 Bulgaria -8.0 China -6.0 Netherlands -5.0 Italy -5.0 Poland -4.0 Belgium -3.0 Soviet Union -2.0 Germany -2.0 East Germany -1.0 United States 0.0 Korea, South 1.0 Sweden 3.0 Russia 4.0 Ukraine 4.5 West Germany 5.0 Unified Team of Germany 7.0 Croatia 9.5 Unified Team 11.0 Canada 13.0 Norway 14.0 Belarus 15.0 Finland 17.0 Czechoslovakia 17.0 Switzerland 20.0 Austria 29.0 Czech Republic 31.0 dtype: float64
rank_diff = top_50.Summer.sub(top_50.Winter).sort_values().to_frame()
rank_diff
| 0 | |
|---|---|
| Country | |
| Brazil | -70.5 |
| Cuba | -68.5 |
| Argentina | -63.5 |
| India | -60.5 |
| Greece | -58.5 |
| Jamaica | -56.5 |
| Pakistan | -55.5 |
| South Africa | -52.0 |
| Mexico | -52.0 |
| Kenya | -50.5 |
| Turkey | -49.5 |
| Nigeria | -48.5 |
| Uruguay | -47.5 |
| Iran | -46.5 |
| Romania | -25.5 |
| Hungary | -22.0 |
| Australia | -20.0 |
| Denmark | -18.5 |
| Spain | -18.5 |
| United Kingdom | -16.0 |
| Yugoslavia | -11.0 |
| New Zealand | -10.5 |
| France | -10.0 |
| Japan | -8.0 |
| Bulgaria | -8.0 |
| China | -6.0 |
| Netherlands | -5.0 |
| Italy | -5.0 |
| Poland | -4.0 |
| Belgium | -3.0 |
| Soviet Union | -2.0 |
| Germany | -2.0 |
| East Germany | -1.0 |
| United States | 0.0 |
| Korea, South | 1.0 |
| Sweden | 3.0 |
| Russia | 4.0 |
| Ukraine | 4.5 |
| West Germany | 5.0 |
| Unified Team of Germany | 7.0 |
| Croatia | 9.5 |
| Unified Team | 11.0 |
| Canada | 13.0 |
| Norway | 14.0 |
| Belarus | 15.0 |
| Finland | 17.0 |
| Czechoslovakia | 17.0 |
| Switzerland | 20.0 |
| Austria | 29.0 |
| Czech Republic | 31.0 |
plt.figure(figsize = (35, 5))
sns.heatmap(rank_diff.T,cmap='RdBu',annot=True,fmt='2.0f', center = 0)
plt.show()
Identify Countries where
What could be the reasons?
plt.figure(figsize = (50, 10))
sns.heatmap(top_50.sort_values(by = "Men").T,cmap='RdYlGn_r',annot=True,fmt='2.0f')
plt.show()
plt.figure(figsize = (50, 10))
sns.heatmap(top_50.sort_values(by = "Women").T,cmap='RdYlGn_r',annot=True,fmt='2.0f')
plt.show()
rank_diff2 = top_50.Men.sub(top_50.Women).sort_values().to_frame()
rank_diff2
| 0 | |
|---|---|
| Country | |
| Pakistan | -83.0 |
| Uruguay | -74.0 |
| Iran | -72.0 |
| India | -37.5 |
| Belgium | -22.0 |
| Croatia | -15.5 |
| Turkey | -13.5 |
| Yugoslavia | -13.0 |
| Italy | -13.0 |
| Switzerland | -13.0 |
| France | -12.5 |
| Finland | -12.0 |
| Sweden | -10.5 |
| Denmark | -9.5 |
| Czechoslovakia | -9.0 |
| United Kingdom | -8.0 |
| Kenya | -7.5 |
| Hungary | -6.0 |
| Mexico | -5.0 |
| Poland | -5.0 |
| Norway | -5.0 |
| Unified Team of Germany | -4.0 |
| Austria | -4.0 |
| New Zealand | -3.0 |
| Spain | -1.0 |
| West Germany | -1.0 |
| United States | 0.0 |
| Soviet Union | 0.0 |
| Japan | 0.0 |
| South Africa | 1.0 |
| Cuba | 1.0 |
| Argentina | 1.0 |
| Canada | 2.0 |
| Czech Republic | 2.0 |
| Greece | 2.0 |
| Nigeria | 2.5 |
| Germany | 3.0 |
| Netherlands | 5.0 |
| Australia | 7.0 |
| East Germany | 9.0 |
| Brazil | 9.0 |
| Russia | 10.0 |
| Bulgaria | 11.0 |
| Unified Team | 13.0 |
| Korea, South | 13.5 |
| Ukraine | 15.0 |
| Belarus | 15.5 |
| Jamaica | 16.5 |
| Romania | 18.0 |
| China | 27.0 |
plt.figure(figsize = (35, 5))
sns.heatmap(rank_diff2.T,cmap='RdBu_r',annot=True,fmt='2.0f', center = 0)
plt.show()
Create the following Seaborn Heatmap that shows the Ranks of Top 50 Countries by Sports.
Identify traditional Sports / National Sports for e.g. UK and China!
olympics
| Edition | Year | City | Sport | Discipline | Athlete | Code | Gender | Event | Medal | Country | Games | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Summer | 1896 | Athens | Aquatics | Swimming | HAJOS, Alfred | HUN | Men | 100M Freestyle | Gold | Hungary | 1896 Athens |
| 1 | Summer | 1896 | Athens | Aquatics | Swimming | HERSCHMANN, Otto | AUT | Men | 100M Freestyle | Silver | Austria | 1896 Athens |
| 2 | Summer | 1896 | Athens | Aquatics | Swimming | DRIVAS, Dimitrios | GRE | Men | 100M Freestyle For Sailors | Bronze | Greece | 1896 Athens |
| 3 | Summer | 1896 | Athens | Aquatics | Swimming | MALOKINIS, Ioannis | GRE | Men | 100M Freestyle For Sailors | Gold | Greece | 1896 Athens |
| 4 | Summer | 1896 | Athens | Aquatics | Swimming | CHASAPIS, Spiridon | GRE | Men | 100M Freestyle For Sailors | Silver | Greece | 1896 Athens |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 36926 | Winter | 2014 | Sochi | Skiing | Snowboard | JONES, Jenny | GBR | Women | Slopestyle | Bronze | United Kingdom | 2014 Sochi |
| 36927 | Winter | 2014 | Sochi | Skiing | Snowboard | ANDERSON, Jamie | USA | Women | Slopestyle | Gold | United States | 2014 Sochi |
| 36928 | Winter | 2014 | Sochi | Skiing | Snowboard | MALTAIS, Dominique | CAN | Women | Snowboard Cross | Silver | Canada | 2014 Sochi |
| 36929 | Winter | 2014 | Sochi | Skiing | Snowboard | SAMKOVA, Eva | CZE | Women | Snowboard Cross | Gold | Czech Republic | 2014 Sochi |
| 36930 | Winter | 2014 | Sochi | Skiing | Snowboard | TRESPEUCH, Chloe | FRA | Women | Snowboard Cross | Bronze | France | 2014 Sochi |
36931 rows × 12 columns
olympics.Sport.value_counts()
Aquatics 4170 Athletics 3637 Rowing 2667 Gymnastics 2307 Skiing 1781 Fencing 1613 Ice Hockey 1563 Football 1497 Hockey 1422 Skating 1256 Wrestling 1211 Shooting 1150 Sailing 1109 Cycling 1105 Basketball 1012 Canoe / Kayak 1002 Volleyball 994 Handball 973 Equestrian 939 Boxing 894 Weightlifting 591 Judo 491 Bobsleigh 452 Biathlon 420 Baseball 335 Archery 329 Tennis 296 Rugby 192 Softball 180 Luge 180 Modern Pentathlon 180 Curling 172 Badminton 144 Table Tennis 144 Taekwondo 112 Tug of War 94 Canoe 81 Polo 66 Lacrosse 59 Golf 30 Triathlon 24 Cricket 24 Rackets 10 Croquet 8 Water Motorsports 5 Basque Pelota 4 Roque 3 Jeu de paume 3 Name: Sport, dtype: int64
sports = olympics.Sport.value_counts().index
sports
Index(['Aquatics', 'Athletics', 'Rowing', 'Gymnastics', 'Skiing', 'Fencing',
'Ice Hockey', 'Football', 'Hockey', 'Skating', 'Wrestling', 'Shooting',
'Sailing', 'Cycling', 'Basketball', 'Canoe / Kayak', 'Volleyball',
'Handball', 'Equestrian', 'Boxing', 'Weightlifting', 'Judo',
'Bobsleigh', 'Biathlon', 'Baseball', 'Archery', 'Tennis', 'Rugby',
'Softball', 'Luge', 'Modern Pentathlon', 'Curling', 'Badminton',
'Table Tennis', 'Taekwondo', 'Tug of War', 'Canoe', 'Polo', 'Lacrosse',
'Golf', 'Triathlon', 'Cricket', 'Rackets', 'Croquet',
'Water Motorsports', 'Basque Pelota', 'Roque', 'Jeu de paume'],
dtype='object')
olympics.Country.value_counts().head(20)
United States 5238 Soviet Union 2489 United Kingdom 1799 Germany 1665 France 1548 Italy 1488 Sweden 1477 Canada 1274 Australia 1204 Hungary 1091 Russia 1031 Norway 1011 East Germany 987 Netherlands 973 Finland 890 China 889 Japan 851 Switzerland 665 Romania 642 Korea, South 616 Name: Country, dtype: int64
top_20 = olympics.Country.value_counts().head(20).index
top_20
Index(['United States', 'Soviet Union', 'United Kingdom', 'Germany', 'France',
'Italy', 'Sweden', 'Canada', 'Australia', 'Hungary', 'Russia', 'Norway',
'East Germany', 'Netherlands', 'Finland', 'China', 'Japan',
'Switzerland', 'Romania', 'Korea, South'],
dtype='object')
by_sport = pd.crosstab(olympics.Country, olympics.Sport)
by_sport
| Sport | Aquatics | Archery | Athletics | Badminton | Baseball | Basketball | Basque Pelota | Biathlon | Bobsleigh | Boxing | ... | Softball | Table Tennis | Taekwondo | Tennis | Triathlon | Tug of War | Volleyball | Water Motorsports | Weightlifting | Wrestling |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Country | |||||||||||||||||||||
| Afghanistan | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| Algeria | 0 | 0 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| Argentina | 3 | 0 | 5 | 0 | 0 | 24 | 0 | 0 | 0 | 24 | ... | 0 | 0 | 1 | 6 | 0 | 0 | 12 | 0 | 2 | 0 |
| Armenia | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 6 |
| Australasia | 11 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| West Germany | 62 | 0 | 67 | 0 | 0 | 0 | 0 | 14 | 22 | 6 | ... | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 7 | 9 |
| West Indies Federation | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| Yugoslavia | 91 | 0 | 2 | 0 | 0 | 96 | 0 | 0 | 0 | 11 | ... | 0 | 4 | 0 | 0 | 0 | 0 | 24 | 0 | 0 | 16 |
| Zambia | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| Zimbabwe | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
145 rows × 48 columns
by_sport = by_sport.rank(ascending = False, method = "average")
by_sport
| Sport | Aquatics | Archery | Athletics | Badminton | Baseball | Basketball | Basque Pelota | Biathlon | Bobsleigh | Boxing | ... | Softball | Table Tennis | Taekwondo | Tennis | Triathlon | Tug of War | Volleyball | Water Motorsports | Weightlifting | Wrestling |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Country | |||||||||||||||||||||
| Afghanistan | 101.0 | 84.0 | 122.0 | 78.0 | 76.0 | 83.0 | 74.0 | 84.5 | 80.5 | 112.0 | ... | 75.0 | 79.0 | 19.5 | 90.5 | 79.0 | 76.5 | 86.0 | 74.0 | 103.5 | 103.0 |
| Algeria | 101.0 | 84.0 | 47.0 | 78.0 | 76.0 | 83.0 | 74.0 | 84.5 | 80.5 | 35.0 | ... | 75.0 | 79.0 | 89.5 | 90.5 | 79.0 | 76.5 | 86.0 | 74.0 | 103.5 | 103.0 |
| Argentina | 41.0 | 84.0 | 53.5 | 78.0 | 76.0 | 11.5 | 74.0 | 84.5 | 80.5 | 9.0 | ... | 75.0 | 79.0 | 28.0 | 12.0 | 79.0 | 76.5 | 17.0 | 74.0 | 45.5 | 103.0 |
| Armenia | 101.0 | 84.0 | 122.0 | 78.0 | 76.0 | 83.0 | 74.0 | 84.5 | 80.5 | 70.5 | ... | 75.0 | 79.0 | 89.5 | 90.5 | 79.0 | 76.5 | 86.0 | 74.0 | 35.5 | 40.0 |
| Australasia | 30.5 | 84.0 | 88.0 | 78.0 | 76.0 | 83.0 | 74.0 | 84.5 | 80.5 | 70.5 | ... | 75.0 | 79.0 | 89.5 | 31.0 | 79.0 | 76.5 | 86.0 | 74.0 | 103.5 | 103.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| West Germany | 18.0 | 84.0 | 16.0 | 78.0 | 76.0 | 83.0 | 74.0 | 9.0 | 6.5 | 35.0 | ... | 75.0 | 79.0 | 89.5 | 21.0 | 79.0 | 76.5 | 86.0 | 74.0 | 28.5 | 32.0 |
| West Indies Federation | 101.0 | 84.0 | 53.5 | 78.0 | 76.0 | 83.0 | 74.0 | 84.5 | 80.5 | 112.0 | ... | 75.0 | 79.0 | 89.5 | 90.5 | 79.0 | 76.5 | 86.0 | 74.0 | 103.5 | 103.0 |
| Yugoslavia | 16.0 | 84.0 | 70.0 | 78.0 | 76.0 | 3.0 | 74.0 | 84.5 | 80.5 | 25.5 | ... | 75.0 | 5.5 | 89.5 | 90.5 | 79.0 | 76.5 | 12.0 | 74.0 | 103.5 | 20.0 |
| Zambia | 101.0 | 84.0 | 88.0 | 78.0 | 76.0 | 83.0 | 74.0 | 84.5 | 80.5 | 70.5 | ... | 75.0 | 79.0 | 89.5 | 90.5 | 79.0 | 76.5 | 86.0 | 74.0 | 103.5 | 103.0 |
| Zimbabwe | 33.0 | 84.0 | 122.0 | 78.0 | 76.0 | 83.0 | 74.0 | 84.5 | 80.5 | 112.0 | ... | 75.0 | 79.0 | 89.5 | 90.5 | 79.0 | 76.5 | 86.0 | 74.0 | 103.5 | 103.0 |
145 rows × 48 columns
by_sport = by_sport.loc[top_20, sports].copy()
by_sport
| Aquatics | Athletics | Rowing | Gymnastics | Skiing | Fencing | Ice Hockey | Football | Hockey | Skating | ... | Lacrosse | Golf | Triathlon | Cricket | Rackets | Croquet | Water Motorsports | Basque Pelota | Roque | Jeu de paume | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| United States | 1.0 | 1.0 | 1.0 | 2.0 | 8.0 | 7.0 | 2.0 | 1.0 | 12.0 | 1.0 | ... | 3.0 | 1.0 | 9.5 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 1.0 | 2.0 |
| Soviet Union | 4.0 | 3.0 | 7.0 | 1.0 | 5.0 | 4.0 | 5.0 | 3.0 | 11.0 | 4.0 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| United Kingdom | 10.5 | 2.0 | 2.0 | 19.0 | 34.5 | 15.0 | 10.0 | 20.0 | 5.0 | 16.0 | ... | 2.0 | 2.0 | 5.0 | 1.5 | 1.0 | 73.5 | 1.0 | 74.0 | 73.5 | 1.0 |
| Germany | 5.0 | 7.0 | 5.0 | 16.0 | 6.0 | 6.0 | 13.0 | 7.5 | 4.0 | 9.0 | ... | 74.5 | 74.5 | 5.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| France | 14.0 | 8.0 | 11.0 | 17.0 | 10.0 | 2.0 | 79.5 | 23.0 | 83.0 | 17.0 | ... | 74.5 | 74.5 | 79.0 | 1.5 | 73.5 | 1.0 | 2.0 | 1.5 | 73.5 | 74.0 |
| Italy | 12.0 | 14.0 | 8.0 | 8.5 | 9.0 | 1.0 | 79.5 | 7.5 | 83.0 | 11.5 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Sweden | 15.0 | 9.0 | 30.0 | 5.0 | 4.0 | 12.0 | 3.0 | 13.5 | 83.0 | 15.0 | ... | 74.5 | 74.5 | 9.5 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Canada | 13.0 | 12.5 | 3.0 | 30.5 | 12.0 | 90.0 | 1.0 | 22.0 | 83.0 | 2.0 | ... | 1.0 | 3.0 | 5.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Australia | 2.0 | 12.5 | 9.0 | 37.0 | 21.5 | 90.0 | 79.5 | 90.5 | 2.0 | 24.0 | ... | 74.5 | 74.5 | 1.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Hungary | 3.0 | 18.5 | 32.0 | 7.0 | 91.5 | 3.0 | 79.5 | 4.0 | 83.0 | 20.0 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Russia | 8.0 | 10.0 | 23.5 | 8.5 | 11.0 | 8.5 | 9.0 | 90.5 | 83.0 | 7.0 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Norway | 46.5 | 34.0 | 15.0 | 10.0 | 1.0 | 31.5 | 79.5 | 10.0 | 83.0 | 5.5 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| East Germany | 9.0 | 4.0 | 4.0 | 13.0 | 14.0 | 31.5 | 79.5 | 9.0 | 83.0 | 10.0 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Netherlands | 10.5 | 31.5 | 10.0 | 26.0 | 34.5 | 15.0 | 79.5 | 16.5 | 1.0 | 3.0 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Finland | 37.5 | 5.0 | 25.0 | 11.0 | 2.0 | 90.0 | 4.0 | 90.5 | 83.0 | 13.5 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| China | 6.0 | 28.5 | 22.0 | 6.0 | 25.0 | 13.0 | 79.5 | 33.0 | 14.5 | 8.0 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Japan | 7.0 | 26.5 | 94.5 | 3.0 | 13.0 | 25.0 | 79.5 | 18.5 | 19.5 | 13.5 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Switzerland | 53.5 | 45.0 | 13.0 | 14.0 | 7.0 | 17.0 | 7.0 | 25.0 | 83.0 | 28.0 | ... | 74.5 | 74.5 | 2.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Romania | 32.0 | 22.0 | 6.0 | 4.0 | 91.5 | 11.0 | 79.5 | 90.5 | 83.0 | 89.5 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
| Korea, South | 37.5 | 70.0 | 94.5 | 28.5 | 91.5 | 18.0 | 79.5 | 29.5 | 10.0 | 5.5 | ... | 74.5 | 74.5 | 79.0 | 74.0 | 73.5 | 73.5 | 74.0 | 74.0 | 73.5 | 74.0 |
20 rows × 48 columns
plt.figure(figsize = (30, 10))
sns.heatmap(by_sport,cmap='RdYlGn_r', vmin = 1, vmax = 6, linewidth = 1)
plt.show()
{'URS': 'Soviet Union',
'GDR': 'East Germany',
'ROU': 'Romania',
'FRG': 'West Germany',
'TCH': 'Czechoslovakia',
'YUG': 'Yugoslavia',
'EUN': 'Unified Team',
'EUA': 'Unified Team of Germany',
'ZZX': 'Mixed teams',
'SRB': 'Serbia',
'ANZ': 'Australasia',
'RU1': 'Russian Empire',
'MNE': 'Montenegro',
'TTO': 'Trinidad and Tobago',
'BOH': 'Bohemia',
'BWI': 'West Indies Federation',
'SGP': 'Singapore',
'IOP': 'Independent Olympic Participants'}